R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

rm(list = ls(all=TRUE)) # clear global environment
library(tidyr) # data loading
library(dplyr) # dataframe manipulation
library(stringr) # string manipulation
library(ggplot2) # plotting

path <- file.path("C:","Users","Paulina-laptop","Desktop","R_Programming","RLadies_workshop")

setwd(path)
sprintf("Current directory: %s", getwd())
## [1] "Current directory: C:/Users/Paulina-laptop/Desktop/R_Programming/RLadies_workshop"

Data loading & preparation

Let’s load some data first

wells <- read.csv(unzip("BCOGC_data/prod_csv.zip", "zone_prd_2016_to_present.csv"), skip = 1)

head(wells)
##   Wa_num Compltn_event_seq Prod_period              UWI Area_code Formtn_code
## 1     29                 0      201601 100142108318W600      3600        6200
## 2     29                 0      201603 100142108318W600      3600        6200
## 3     29                 0      201611 100142108318W600      3600        6200
## 4     29                 0      201612 100142108318W600      3600        6200
## 5     29                 0      201701 100142108318W600      3600        6200
## 6     29                 0      201702 100142108318W600      3600        6200
##   Pool_seq Gas_prod_vol..e3m3. Oil_prod_vol..m3. Water_prod_vol..m3.
## 1        A               107.7                 0                 2.3
## 2        A                45.2                 0                 1.7
## 3        A               166.2                 0                 5.0
## 4        A               159.2                 0                 2.6
## 5        A               133.5                 0                 3.7
## 6        A                95.8                 0                 4.1
##   Cond_prod_vol..m3. Prod_days. Gas_prod_cum..e3m3. Oil_prod_cum..m3.
## 1                0.5       31.0               107.7                 0
## 2                0.3       13.0               152.9                 0
## 3                1.3       22.0               319.1                 0
## 4                0.5       31.0               478.3                 0
## 5                0.6       31.0               611.8                 0
## 6                0.5       27.8               707.6                 0
##   Water_prod_cum..m3. Cond_prod_cum..m3. Project_code
## 1                 2.3                0.5            0
## 2                 4.0                0.8            0
## 3                 9.0                2.1            0
## 4                11.6                2.6            0
## 5                15.3                3.2            0
## 6                19.4                3.7            0

Column names in the original file have inconvenient interpunction, which can be easily fixed:

names(wells) <- str_replace_all(names(wells), c(" " = "" , 
                                                "," = "",
                                                "\\("="_",
                                                "\\)"="",
                                                "\\.."="_",
                                                "\\."=""
                                                ))
names(wells)
##  [1] "Wa_num"            "Compltn_event_seq" "Prod_period"      
##  [4] "UWI"               "Area_code"         "Formtn_code"      
##  [7] "Pool_seq"          "Gas_prod_vol_e3m3" "Oil_prod_vol_m3"  
## [10] "Water_prod_vol_m3" "Cond_prod_vol_m3"  "Prod_days"        
## [13] "Gas_prod_cum_e3m3" "Oil_prod_cum_m3"   "Water_prod_cum_m3"
## [16] "Cond_prod_cum_m3"  "Project_code"
wells <- subset(wells, select=c(Wa_num, Prod_period, Prod_days, Area_code, Formtn_code, Gas_prod_vol_e3m3, Oil_prod_vol_m3, Water_prod_vol_m3, Cond_prod_vol_m3)) # select only useful columns

area_codes <- read.table("BCOGC_data/ogc_area_codes.csv", 
                         sep = ",", skip = 1, header = TRUE, stringsAsFactors = FALSE)
head(area_codes, 3)
##   Area.Code    Area.Name
## 1        50       ADSETT
## 2       100      AIRPORT
## 3       200 AITKEN CREEK
# rename so that we have the same name of Area_code and Area_name in all columns 
area_codes <- rename(area_codes, Area_code = Area.Code, Area_name = Area.Name)

formation_codes <- read.table("BCOGC_data/ogc_formation_codes.csv", 
                              sep = ",", skip = 1, header = TRUE, stringsAsFactors = FALSE)
head(formation_codes, 3)
##   Formation.Code        Formation.Name
## 1           4610 A MARKER/BASE OF LIME
## 2           2703          AITKEN CREEK
## 3           9922              ALDRIDGE
formation_codes <- rename(formation_codes, Formtn_code = Formation.Code, Formtn_name = Formation.Name)

Merge dataframes to create 2 additional columns with area and name as the wells file contains only its numerical codes.

wells <- merge(wells, area_codes, by = 'Area_code')
wells <- merge(wells, formation_codes, by = 'Formtn_code')

# we don't need them anymore in memory and dataframe
rm(area_codes, formation_codes) # remove form memory
wells <- subset(wells, select = -c(Area_code, Formtn_code)) # remove from dataframe

First glimpse on the data. Notice newly created columns, Area_name and Formtn_name at the end.

names(wells)
## [1] "Wa_num"            "Prod_period"       "Prod_days"        
## [4] "Gas_prod_vol_e3m3" "Oil_prod_vol_m3"   "Water_prod_vol_m3"
## [7] "Cond_prod_vol_m3"  "Area_name"         "Formtn_name"
str(wells)
## 'data.frame':    517518 obs. of  9 variables:
##  $ Wa_num           : int  14893 25370 25370 26962 14893 14893 25371 25371 25371 26962 ...
##  $ Prod_period      : int  201809 202003 201901 201709 201805 202001 202011 201912 201805 201907 ...
##  $ Prod_days        : num  30 30.7 30.8 23 31 31 28.5 30.4 30.4 28 ...
##  $ Gas_prod_vol_e3m3: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Oil_prod_vol_m3  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Water_prod_vol_m3: num  91.4 1291 2528 1068 6948.7 ...
##  $ Cond_prod_vol_m3 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Area_name        : chr  "DESAN" "PEEJAY WEST" "PEEJAY WEST" "BEAVERTAIL" ...
##  $ Formtn_name      : chr  "QUATERNARY" "QUATERNARY" "QUATERNARY" "QUATERNARY" ...
summary(wells)
##      Wa_num       Prod_period       Prod_days     Gas_prod_vol_e3m3 
##  Min.   :   29   Min.   :201601   Min.   : 0.00   Min.   :     0.0  
##  1st Qu.:15183   1st Qu.:201704   1st Qu.:25.60   1st Qu.:    38.2  
##  Median :22328   Median :201807   Median :30.00   Median :   124.8  
##  Mean   :21074   Mean   :201810   Mean   :26.08   Mean   :   555.4  
##  3rd Qu.:28257   3rd Qu.:201910   3rd Qu.:31.00   3rd Qu.:   608.5  
##  Max.   :41055   Max.   :202101   Max.   :31.10   Max.   :119042.3  
##  Oil_prod_vol_m3   Water_prod_vol_m3 Cond_prod_vol_m3   Area_name        
##  Min.   :   0.00   Min.   :    0.0   Min.   :   0.00   Length:517518     
##  1st Qu.:   0.00   1st Qu.:    0.9   1st Qu.:   0.00   Class :character  
##  Median :   0.00   Median :    5.0   Median :   0.00   Mode  :character  
##  Mean   :  10.77   Mean   :  181.4   Mean   :  19.31                     
##  3rd Qu.:   0.00   3rd Qu.:   52.6   3rd Qu.:   1.00                     
##  Max.   :7089.30   Max.   :29177.7   Max.   :7424.00                     
##  Formtn_name       
##  Length:517518     
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
# Let's check number of formations in the file...
length(unique(wells$Formtn_name)) 
## [1] 92
# ...and list them
unique(wells$Formtn_name)
##  [1] "QUATERNARY"                   "CRETACEOUS"                  
##  [3] "CARDIUM SAND"                 "DOE CREEK"                   
##  [5] "DUNVEGAN"                     "BASE OF FISH SCALES"         
##  [7] "SCATTER"                      "PADDY"                       
##  [9] "PADDY- CADOTTE"               "CADOTTE"                     
## [11] "SPIRIT RIVER"                 "NOTIKEWIN"                   
## [13] "FALHER"                       "FALHER A"                    
## [15] "FALHER B"                     "FALHER C"                    
## [17] "FALHER D"                     "WILRICH"                     
## [19] "BLUESKY"                      "BASAL BLUESKY"               
## [21] "BLUESKY-GETHING"              "BLUESKY-GETHING-DETRITAL"    
## [23] "DETRITAL"                     "GETHING"                     
## [25] "LOWER GETHING"                "BASAL GETHING"               
## [27] "GETHING-BALDONNEL"            "CADOMIN"                     
## [29] "CHINKEH"                      "NIKANASSIN"                  
## [31] "DUNLEVY"                      "LOWER DUNLEVY"               
## [33] "ROCK CREEK"                   "NORDEGG"                     
## [35] "NORDEGG-BALDONNEL"            "PARDONET-BALDONNEL"          
## [37] "BALDONNEL"                    "BALDONNEL/UPPER CHARLIE LAKE"
## [39] "CHARLIE LAKE"                 "SIPHON"                      
## [41] "CECIL"                        "NANCY"                       
## [43] "FLATROCK"                     "BOUNDARY LAKE"               
## [45] "YELLOW MARKER"                "COPLIN"                      
## [47] "MICA"                         "BLUEBERRY"                   
## [49] "INGA"                         "NORTH PINE"                  
## [51] "BEAR FLAT"                    "PINGEL"                      
## [53] "LIMESTONE A BED"              "A MARKER/BASE OF LIME"       
## [55] "LOWER CHARLIE LAKE SANDS"     "ARTEX"                       
## [57] "ARTEX/HALFWAY"                "UPPER HALFWAY"               
## [59] "HALFWAY"                      "LOWER HALFWAY"               
## [61] "DOIG"                         "DOIG PHOSPHATE BEDS"         
## [63] "BLUESKY-GETHING-MONTNEY"      "LOWER CHARLIE LAKE/MONTNEY"  
## [65] "DOIG PHOSPHATE-MONTNEY"       "MONTNEY"                     
## [67] "BELLOY"                       "BELCOURT"                    
## [69] "BELCOURT-TAYLOR FLAT"         "BELLOY-KISKATINAW"           
## [71] "TAYLOR FLAT"                  "MISSISSIPPIAN"               
## [73] "KISKATINAW"                   "LOWER KISKATINAW"            
## [75] "BASAL KISKATINAW"             "UPPER DEBOLT"                
## [77] "DEBOLT"                       "ELKTON"                      
## [79] "SHUNDA"                       "PEKISKO"                     
## [81] "BANFF"                        "BESA RIVER"                  
## [83] "WABAMUN"                      "TETCHO"                      
## [85] "KAKISA"                       "JEAN MARIE"                  
## [87] "MUSKWA"                       "MUSKWA-OTTER PARK"           
## [89] "SLAVE POINT"                  "SULPHUR POINT"               
## [91] "EVIE"                         "PINE POINT"

Key dplyr functions:

filter

slave_pnt <- filter(wells, Formtn_name == "SLAVE POINT" )
unique(slave_pnt$Formtn_name)
## [1] "SLAVE POINT"
muskwa <- filter(wells, Formtn_name %in% c("MUSKWA", "MUSKWA-OTTER PARK"))
unique(muskwa$Formtn_name)
## [1] "MUSKWA"            "MUSKWA-OTTER PARK"
# same as
muskwa <- filter(wells, Formtn_name == "MUSKWA" | Formtn_name == "MUSKWA-OTTER PARK")
unique(muskwa$Formtn_name)
## [1] "MUSKWA"            "MUSKWA-OTTER PARK"

arrange - sort dataframe

Wa_num_sorted <- arrange(wells, Wa_num) # ascending
head(Wa_num_sorted)
##   Wa_num Prod_period Prod_days Gas_prod_vol_e3m3 Oil_prod_vol_m3
## 1     29      201703      30.0              73.0               0
## 2     29      201704      24.0              64.7               0
## 3     29      201706       3.0               7.2               0
## 4     29      201707      15.1              53.8               0
## 5     29      201611      22.0             166.2               0
## 6     29      201601      31.0             107.7               0
##   Water_prod_vol_m3 Cond_prod_vol_m3    Area_name Formtn_name
## 1               2.8              0.3 FORT ST JOHN      BELLOY
## 2               1.9              0.3 FORT ST JOHN      BELLOY
## 3               0.3              0.1 FORT ST JOHN      BELLOY
## 4               2.0              0.5 FORT ST JOHN      BELLOY
## 5               5.0              1.3 FORT ST JOHN      BELLOY
## 6               2.3              0.5 FORT ST JOHN      BELLOY
Wa_num_sorted <- arrange(wells, desc(Wa_num)) # descending
head(Wa_num_sorted)
##   Wa_num Prod_period Prod_days Gas_prod_vol_e3m3 Oil_prod_vol_m3
## 1  41055      202012      30.6           15051.0               0
## 2  41055      202101      30.8           12528.5               0
## 3  41055      202011       5.5            1293.3               0
## 4  41054      202011       5.6            1658.9               0
## 5  41054      202012      30.8           19143.3               0
## 6  41054      202101      30.8           17839.7               0
##   Water_prod_vol_m3 Cond_prod_vol_m3 Area_name Formtn_name
## 1            3630.5            950.8  HERITAGE     MONTNEY
## 2            2087.3            770.4  HERITAGE     MONTNEY
## 3            3396.8            110.8  HERITAGE     MONTNEY
## 4            3444.7            317.9  HERITAGE     MONTNEY
## 5            4204.7           1691.9  HERITAGE     MONTNEY
## 6            2476.8           1170.2  HERITAGE     MONTNEY
Wa_num_sorted <- arrange(wells, desc(Wa_num), Prod_period) # by multiple conditions
head(Wa_num_sorted)
##   Wa_num Prod_period Prod_days Gas_prod_vol_e3m3 Oil_prod_vol_m3
## 1  41055      202011       5.5            1293.3               0
## 2  41055      202012      30.6           15051.0               0
## 3  41055      202101      30.8           12528.5               0
## 4  41054      202011       5.6            1658.9               0
## 5  41054      202012      30.8           19143.3               0
## 6  41054      202101      30.8           17839.7               0
##   Water_prod_vol_m3 Cond_prod_vol_m3 Area_name Formtn_name
## 1            3396.8            110.8  HERITAGE     MONTNEY
## 2            3630.5            950.8  HERITAGE     MONTNEY
## 3            2087.3            770.4  HERITAGE     MONTNEY
## 4            3444.7            317.9  HERITAGE     MONTNEY
## 5            4204.7           1691.9  HERITAGE     MONTNEY
## 6            2476.8           1170.2  HERITAGE     MONTNEY
rm(Wa_num_sorted)

select - select dataframe by columns

names(wells)
## [1] "Wa_num"            "Prod_period"       "Prod_days"        
## [4] "Gas_prod_vol_e3m3" "Oil_prod_vol_m3"   "Water_prod_vol_m3"
## [7] "Cond_prod_vol_m3"  "Area_name"         "Formtn_name"
head(select(wells, Area_name, Formtn_name))
##     Area_name Formtn_name
## 1       DESAN  QUATERNARY
## 2 PEEJAY WEST  QUATERNARY
## 3 PEEJAY WEST  QUATERNARY
## 4  BEAVERTAIL  QUATERNARY
## 5       DESAN  QUATERNARY
## 6       DESAN  QUATERNARY
# instead of listing columns separately, we can list multiple columns next to each other using ":"
head(select(wells, Gas_prod_vol_e3m3:Cond_prod_vol_m3, Area_name, Formtn_name))
##   Gas_prod_vol_e3m3 Oil_prod_vol_m3 Water_prod_vol_m3 Cond_prod_vol_m3
## 1                 0               0              91.4                0
## 2                 0               0            1291.0                0
## 3                 0               0            2528.0                0
## 4                 0               0            1068.0                0
## 5                 0               0            6948.7                0
## 6                 0               0             109.3                0
##     Area_name Formtn_name
## 1       DESAN  QUATERNARY
## 2 PEEJAY WEST  QUATERNARY
## 3 PEEJAY WEST  QUATERNARY
## 4  BEAVERTAIL  QUATERNARY
## 5       DESAN  QUATERNARY
## 6       DESAN  QUATERNARY

The same wells have been listed multiple times as there are various production periods each year. Let’s create new column Prod_year to make more general summary.

mutate - create new columns

head(wells$Prod_period)
## [1] 201809 202003 201901 201709 201805 202001
wells <- wells %>%
  mutate(Prod_year = substr(Prod_period, 1, 4),
         Prod_month = substr(Prod_period, 5, 6),
         Prod_ym = paste(Prod_year, Prod_month, sep="-")) #to create time "points" only from available dates

head(wells)
##   Wa_num Prod_period Prod_days Gas_prod_vol_e3m3 Oil_prod_vol_m3
## 1  14893      201809      30.0                 0               0
## 2  25370      202003      30.7                 0               0
## 3  25370      201901      30.8                 0               0
## 4  26962      201709      23.0                 0               0
## 5  14893      201805      31.0                 0               0
## 6  14893      202001      31.0                 0               0
##   Water_prod_vol_m3 Cond_prod_vol_m3   Area_name Formtn_name Prod_year
## 1              91.4                0       DESAN  QUATERNARY      2018
## 2            1291.0                0 PEEJAY WEST  QUATERNARY      2020
## 3            2528.0                0 PEEJAY WEST  QUATERNARY      2019
## 4            1068.0                0  BEAVERTAIL  QUATERNARY      2017
## 5            6948.7                0       DESAN  QUATERNARY      2018
## 6             109.3                0       DESAN  QUATERNARY      2020
##   Prod_month Prod_ym
## 1         09 2018-09
## 2         03 2020-03
## 3         01 2019-01
## 4         09 2017-09
## 5         05 2018-05
## 6         01 2020-01
class(wells$Prod_ym)
## [1] "character"

summarize

head(wells$Prod_period)
## [1] 201809 202003 201901 201709 201805 202001
prod_days_by_area <- wells %>%
  group_by(Area_name) %>%
  summarize(
    total_prod_days = sum(Prod_days)
    )

head(prod_days_by_area)
## # A tibble: 6 x 2
##   Area_name          total_prod_days
##   <chr>                        <dbl>
## 1 ADSETT                       6385.
## 2 AIRPORT                       141.
## 3 AITKEN CREEK                 4164.
## 4 AITKEN CREEK NORTH           1885.
## 5 ALTARES                     14861.
## 6 BEAVERDAM                    6439.
prod_days_by_area %>% filter(total_prod_days > 500000) %>% 
  ggplot(aes(x = Area_name, y = total_prod_days)) + 
  geom_col() + 
  ggtitle("Total Production Days for most productive areas", ) +
  ylab("Production days") + 
  xlab("Area")

histograms

Histograms are commonly used to visualize the distribution of our data. The bars of histogram correspond to the number of observations within specific bin. Note: it’s recommended to check different bin sizes to find the most optimal bin for the data.

wells %>% filter(Cond_prod_vol_m3 > 0) %>% 
  ggplot() + 
  geom_histogram(mapping = aes(Cond_prod_vol_m3))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# let's limit our data to the wells which had production (100-1000). Here, we use default number of bins (30)
wells %>% filter(Cond_prod_vol_m3 > 100, Cond_prod_vol_m3 < 1000) %>% 
  ggplot() + 
  geom_histogram(mapping = aes(Cond_prod_vol_m3))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# increase number of bins to 200 to see the difference
wells %>% filter(Cond_prod_vol_m3 > 100, Cond_prod_vol_m3 < 1000) %>% 
  ggplot() + 
  geom_histogram(mapping = aes(Cond_prod_vol_m3), bins=200) + 
  ggtitle("Distribution of condensate production per production period") + 
  xlab(expression("Condensate production per production period m"^3))

tables

Tables are a nice way to present numerical data and identify the trends. Let’s analyze the water production in different formations.

# Using tables we can see how many wells we have for each formation
wells %>% group_by(Formtn_name) %>%
  summarize(
    count_wells = n_distinct(Wa_num),#count unique well numbers (multiple prod. periods)
    mean_wtr_prod = mean(Water_prod_vol_m3)
  ) %>% arrange(desc(count_wells))
## # A tibble: 92 x 3
##    Formtn_name             count_wells mean_wtr_prod
##    <chr>                         <int>         <dbl>
##  1 MONTNEY                        4745        150.  
##  2 JEAN MARIE                     1531          3.21
##  3 HALFWAY                         659         62.2 
##  4 BLUESKY                         589       2038.  
##  5 CADOMIN                         535         10.7 
##  6 NOTIKEWIN                       406          5.03
##  7 BALDONNEL                       356         62.3 
##  8 BLUESKY-GETHING-MONTNEY         354          1.34
##  9 GETHING                         330          2.33
## 10 BOUNDARY LAKE                   278        483.  
## # ... with 82 more rows
# we can find which areas / formations are suspected to have more water disposal wells than HC production wells
wells %>% group_by(Area_name) %>%
  summarize(
    count_wells = n_distinct(Wa_num), 
    mean_wtr_prod = mean(Water_prod_vol_m3) 
  ) %>% arrange(desc(mean_wtr_prod))
## # A tibble: 169 x 3
##    Area_name   count_wells mean_wtr_prod
##    <chr>             <int>         <dbl>
##  1 HAY RIVER           211         4481.
##  2 KLUA                  1         4328.
##  3 SUNRISE              14         2990.
##  4 TOWER LAKE            7         2458.
##  5 CLARKE LAKE          38         2145.
##  6 LAPP                  9         1699.
##  7 GOPHER                1         1527.
##  8 WOODRUSH              8          867.
##  9 TWO RIVERS            9          708.
## 10 LIARD                 6          700.
## # ... with 159 more rows

scatterplots

Scatterplots are useful when we want to show the relationship between 2 variables.

We can convert our dataframe to the long format to have all types of hydrocarbon production in one column using gather function. Picture below visualizes this:

# let's look again on the selected columns of original dataframe
wells %>% select(Water_prod_vol_m3, Gas_prod_vol_e3m3, Oil_prod_vol_m3, Cond_prod_vol_m3) %>% 
  head(3)
##   Water_prod_vol_m3 Gas_prod_vol_e3m3 Oil_prod_vol_m3 Cond_prod_vol_m3
## 1              91.4                 0               0                0
## 2            1291.0                 0               0                0
## 3            2528.0                 0               0                0
# we will use gather function to create new column "Prod_type" which will contain the "Prod_volume" value for gas, oil and condensate production. We assign the results to new dataframe.
prod_df <- wells %>% 
  gather(key = "Prod_type",
         value = "Prod_volume",
         c(Gas_prod_vol_e3m3, Oil_prod_vol_m3, Cond_prod_vol_m3)) %>% 
  select(Water_prod_vol_m3, Prod_volume, Prod_year, Prod_ym, Prod_type, Prod_days, Formtn_name)

prod_df %>% select(Water_prod_vol_m3, Prod_type) %>% head(3)
##   Water_prod_vol_m3         Prod_type
## 1              91.4 Gas_prod_vol_e3m3
## 2            1291.0 Gas_prod_vol_e3m3
## 3            2528.0 Gas_prod_vol_e3m3
# indeed, we have production type in Prod_type column!
unique(prod_df$Prod_type)
## [1] "Gas_prod_vol_e3m3" "Oil_prod_vol_m3"   "Cond_prod_vol_m3"
# let's visualize that quickly
prod_df %>% 
  filter(Prod_year == 2021) %>% 
  ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type)) +
  geom_point()

# add some axis limits
prod_df %>% 
  filter(Prod_year == 2021) %>% 
  ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type)) +
  geom_point() + 
  xlim(0,10000) + 
  ylim(0,25000)
## Warning: Removed 45 rows containing missing values (geom_point).

# visualize each type of production separately
prod_df %>% 
  filter(Prod_year == 2021) %>% 
  ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type)) +
  geom_point() + 
  facet_wrap(~Prod_type)

# create indepentent y axis for each chart, since the production is in different units.
prod_df %>% 
  filter(Prod_year == 2021) %>% 
  ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type, alpha = 0.3)) +
  geom_point() + 
  facet_wrap(~Prod_type, scales = "free") # scales free => not fixed limits of yaxis

# I expect that Prod_volume > 0 will indicate the SWD disposal wells, let's remove those from the plot
prod_df %>% 
  filter(Prod_year == 2021, Prod_volume > 0) %>% # Prod_colume > 0 to remove disposal wells 
  ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type, alpha = 0.3)) +
  geom_point() + 
  ggtitle("Water production per HC type") +
  facet_wrap(~Prod_type, scales = "free") +
  xlim(0,5000) 
## Warning: Removed 126 rows containing missing values (geom_point).

bar charts

Bar charts can be created in using two geometries: geom_col - by default height of the bars represent value in the data geom_bar - by default counts number of cases in each group

### Let's start with geom_col to visualize the production per period...
wells %>% subset(Prod_year > 2000) %>% 
  group_by(Prod_ym) %>% 
  summarise(
    prod_per_period = sum(Cond_prod_vol_m3)
  ) %>% 
  ggplot(aes(x = Prod_ym, y = prod_per_period)) + 
  theme(axis.text.x = element_text(angle = 90)) + 
  geom_col()

### ...and per year. Which one is more useful / interesting?
wells %>% 
  subset(Prod_year > 2000) %>%
  group_by(Prod_year) %>% 
  summarise(
    prod_per_year = sum(Cond_prod_vol_m3)
  ) %>% 
  ggplot(aes(x = Prod_year, y = prod_per_year)) + 
  geom_col()

class(wells$Prod_period)
## [1] "integer"

line graphs

Line graphs are useful to visualize the change in time. We can visualize the trends in production for example.

# reminder: available formation names in our dataframe 
unique(wells$Formtn_name)[1:20]
##  [1] "QUATERNARY"          "CRETACEOUS"          "CARDIUM SAND"       
##  [4] "DOE CREEK"           "DUNVEGAN"            "BASE OF FISH SCALES"
##  [7] "SCATTER"             "PADDY"               "PADDY- CADOTTE"     
## [10] "CADOTTE"             "SPIRIT RIVER"        "NOTIKEWIN"          
## [13] "FALHER"              "FALHER A"            "FALHER B"           
## [16] "FALHER C"            "FALHER D"            "WILRICH"            
## [19] "BLUESKY"             "BASAL BLUESKY"
# let's visualize only one formation first
formtnName = "BLUESKY"
prod_df %>%
  filter(Formtn_name == formtnName) %>% 
  group_by(Prod_ym, Formtn_name, Prod_type) %>% 
  summarise(
    total_prod = sum(Prod_volume)
    ) %>% 
  ungroup() %>% 
  ggplot(aes(x = Prod_ym, y = total_prod, group=Formtn_name, col=Prod_type)) +
  geom_line() + 
  scale_x_discrete(breaks = c("2016-01", "2017-01","2018-01",
                              "2019-01","2020-01", "2021-01")) +
  facet_grid(Prod_type ~ ., scales = "free") + 
  ggtitle(paste("Production per year for formation", formtnName))
## `summarise()` has grouped output by 'Prod_ym', 'Formtn_name'. You can override using the `.groups` argument.

#  and now production from all formations in one chart
prod_df %>%
  group_by(Prod_ym, Prod_type) %>% 
  summarise(
    total_prod = sum(Prod_volume)
    ) %>% 
  ungroup() %>% 
  ggplot(aes(x = Prod_ym, y = total_prod, group=1, col=Prod_type)) +
  geom_line() + 
  scale_x_discrete(breaks = c("2016-01", "2017-01","2018-01", 
                              "2019-01","2020-01", "2021-01")) +
  facet_grid(Prod_type ~ ., scales = "free") +
  ggtitle("Total production per year")
## `summarise()` has grouped output by 'Prod_ym'. You can override using the `.groups` argument.

We can apply simple trick to order the formations from the highest production to the lowest (using mutate function).

We save most productive formations separately, they will be useful later to plot production in time.

# first let's create dataframe with summed produced values by formation
prod_by_formation <- wells %>% 
  group_by(Formtn_name) %>%
  summarise(
    oil_prod_total = sum(Oil_prod_vol_m3),
    gas_prod_total = sum(Gas_prod_vol_e3m3),
    cond_prod_total = sum(Cond_prod_vol_m3), 
    water_prod_total = sum(Water_prod_vol_m3)
    ) %>%
  ungroup()

# find 5 most productive gas formation
most_gas <- prod_by_formation %>% 
  arrange(desc(gas_prod_total)) %>% 
  head(5) 

ggplot(most_gas, aes(x = Formtn_name, y = gas_prod_total)) +
  geom_col(fill='#ff6347') # change color to red

# we can use a trick to order the bars according to production value (using mutate function)
most_gas <- most_gas %>%  
  mutate(Formtn_name=factor(Formtn_name, levels=Formtn_name))

ggplot(most_gas, aes(x = Formtn_name, y = gas_prod_total)) +
  geom_col(fill='#ff6347') # change color to red

# same for oil production
most_oil <- prod_by_formation %>% 
  arrange(desc(oil_prod_total)) %>% 
  head(5) %>% 
  mutate(Formtn_name=factor(Formtn_name, levels=Formtn_name))

ggplot(most_oil, aes(x = Formtn_name, y = oil_prod_total)) +
  geom_col()

# and for condensate production
most_cond <- prod_by_formation %>% 
  arrange(desc(cond_prod_total)) %>% 
  head(5) %>% 
  mutate(Formtn_name=factor(Formtn_name, levels=Formtn_name))

ggplot(most_gas, aes(x = Formtn_name, y = gas_prod_total)) +
  geom_col(fill="#6495ed")

We can also visualize the same set of formations. To define the set, we will take 5 most productive gas, oil and condensate formations.

# define 5 most productive formations
best_formations <- unique(most_oil$Formtn_name,
                            most_gas$Formtn_name,
                            most_cond$Formtn_name)

# plot oil production
wells %>% 
  subset(Formtn_name %in% best_formations) %>%
  ggplot(aes(x = Formtn_name, y = Oil_prod_vol_m3, fill=Formtn_name)) +
    geom_col() +
  ggtitle("Total oil production by formation") +
  xlab("Formation") +
  ylab("Oil production [m3]")

# plot gas production
prod_by_formation %>% 
  subset(Formtn_name %in% best_formations) %>%
  ggplot(aes(x = Formtn_name, y = gas_prod_total, fill=Formtn_name)) +
    geom_col() + 
  ggtitle("Total gas production by formation") + 
  xlab("Formation") + 
  ylab("Gas production [e3m3]")

# plot condensate production
prod_by_formation %>% 
  subset(Formtn_name %in% best_formations) %>%
  ggplot(aes(x = Formtn_name, y = cond_prod_total, fill=Formtn_name)) +
    geom_col() + 
  ggtitle("Total condensate production by formation") + 
  xlab("Formation") + 
  ylab("Condensate production [m3]")

# plot water production
prod_by_formation %>% 
  subset(Formtn_name %in% best_formations) %>%
  ggplot(aes(x = Formtn_name, y = water_prod_total, fill=Formtn_name)) +
    geom_col() + 
  ggtitle("Total water production by formation") + 
  xlab("Formation") + 
  ylab("Water production [m3]")

{r eval=TRUE, echo=FALSE} # gas_prod <- prod_by_formation[prod_by_formation$gas_prod_total > 0,] # oil_prod <- prod_by_formation[prod_by_formation$oil_prod_total > 0,] # cond_prod <- prod_by_formation[prod_by_formation$cond_prod_total > 0,] #

FACETS - subplots according to some variable

let’s see how facets can increase the visibility of our charts ()

#library(tidyverse)

# here we plot the count of wells per year for all formations
ggplot(data = wells, aes(x = Prod_year)) +
  geom_bar()

# filter wells for most productive formations and most recent wells. Add some colors (fill) selected manually (scale_fill_manual)
ggplot(filter(wells, Formtn_name %in% best_formations & Prod_year < 2021)) +
  geom_bar(aes(x = Formtn_name, fill=Formtn_name)) + 
  scale_fill_manual(values = c('#d0d1e6','#a6bddb','#67a9cf','#1c9099','#016c59')) + # taken e.g. from colorbrewer
  facet_wrap(~ Prod_year) + 
  theme(axis.text.x = element_blank(), # remove ticks #cosmetics
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank())

Bonus: Interactive area charts!

Making interactive plots in R is extremely easy. First, we need to create standard “static” chart and then feed it to plotly.

library(plotly)
## Warning: package 'plotly' was built under R version 4.0.4
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
head(prod_df)
##   Water_prod_vol_m3 Prod_volume Prod_year Prod_ym         Prod_type Prod_days
## 1              91.4           0      2018 2018-09 Gas_prod_vol_e3m3      30.0
## 2            1291.0           0      2020 2020-03 Gas_prod_vol_e3m3      30.7
## 3            2528.0           0      2019 2019-01 Gas_prod_vol_e3m3      30.8
## 4            1068.0           0      2017 2017-09 Gas_prod_vol_e3m3      23.0
## 5            6948.7           0      2018 2018-05 Gas_prod_vol_e3m3      31.0
## 6             109.3           0      2020 2020-01 Gas_prod_vol_e3m3      31.0
##   Formtn_name
## 1  QUATERNARY
## 2  QUATERNARY
## 3  QUATERNARY
## 4  QUATERNARY
## 5  QUATERNARY
## 6  QUATERNARY
# let's filter the data first and summarize the production
prod_per_period <- prod_df %>% 
  filter(Formtn_name %in% best_formations) %>% 
  group_by(Prod_ym, Prod_type, Formtn_name) %>% 
  summarise(
    prod_per_period = sum(Prod_volume)
  )
## `summarise()` has grouped output by 'Prod_ym', 'Prod_type'. You can override using the `.groups` argument.
# let's plot it according to the production type
p <- ggplot(prod_per_period, aes(x=Prod_ym, y=prod_per_period, group=Formtn_name, fill=Formtn_name)) + 
  geom_area(alpha=0.3) + 
  scale_x_discrete(breaks = c("2016-01", "2017-01","2018-01", 
                              "2019-01","2020-01", "2021-01")) +
  facet_grid(Prod_type ~ ., scales = "free")

p <- ggplotly(p)
p
# That's nice but let's add some details, like text when we hover over charts. 
p <- ggplot(prod_per_period, aes(x=Prod_ym, y=prod_per_period, group=Prod_type, 
                                 text = paste("Formation:", Formtn_name, 
                                              "<br>Prod. type:", Prod_type), fill=Prod_type)) +
  geom_area(colour="#636363", alpha=0.3) + 
  scale_x_discrete(breaks = c("2016-01", "2017-01","2018-01", 
                              "2019-01","2020-01", "2021-01")) + 
  facet_wrap(Formtn_name ~ ., scales = "free") + 
  ylab("") + 
  xlab("")

p <- ggplotly(p, tooltip = "text")
p

Visualize data on map!

Let’s move to another dataset provided by BCOGC to access the location of the wells and plot them together with geophysical lines, O&G pools and fields. O&G field => group of O&G pools

Here we will use geom_sf geometry and sf package, commonly used for geospatial data manipulation. We fill use following sf functions: st_as_sf() - convert foreign object to the sf object st_crs() - get coordinate system of the feature st_transform() - transform features (for example change coordinate system) st_union() - merge multiple features into one st_intersects() -

st_convex_hull() - create polygon around the points (minimum bounding area) st_within() - get features within other feature st_crop() - crop the feature

rm(list = ls(all=TRUE)) # clear global environment

path <- file.path("C:","Users","Paulina-laptop","Desktop","R_Programming","RLadies_workshop")

setwd(path)
sprintf("Current directory: %s", getwd())
## [1] "Current directory: C:/Users/Paulina-laptop/Desktop/R_Programming/RLadies_workshop"
library(sf)
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
# load dplyr and ggplot2 in case this is the only chunk you want to run
library(dplyr) 
library(ggplot2) 

wells_coords <- read.csv(unzip("BCOGC_data/drill_csv.zip", "wells.csv"), skip = 1)  

head(wells_coords)#bigger file
##       Well.Surf.Loc                                               Well.Name
## 1 D- 080-H/092-G-03                    ROYAL   CITY NO. 1 D- 080-H/092-G-03
## 2 B- 050-D/082-G-01             CANADIAN   KOOTENAY NO. 1 B- 050-D/082-G-01
## 3 D- 055-A/082-G-02                   BORDER OILS   NO. 1 D- 055-A/082-G-02
## 4      13-11-081-17                           CANLIN   PINGEL  13-11-081-17
## 5 A- 065-E/093-I-16 CVE ENERGY ET AL  LONE MOUNTAIN NO. 1 A- 065-E/093-I-16
## 6 C- 048-D/103-G-05     STRATHCONA   QUEEN CHARLOTTE NO.1 C- 048-D/103-G-05
##   WA.Num Surf.Nad27.Lat Surf.Nad27.Long Surf.Nad83.Lat Surf.Nad83.Long
## 1      1       49084860       123064913       49085314       123065320
## 2      2       49020730       114294872       49035335       114497851
## 3      3       49025250       114331128       49047892       114554121
## 4      4             NA              NA       56004511       120331605
## 5      5       54530889       120254600       54530863       120255126
## 6      6       53172250       131590333       53171412       131575692
##   Surf.UTM.Zone.Num Surf.UTM83.Northng Surf.UTM83.Eastng Surf.North Surf.East
## 1                10            5443925          491629.8     -192.0     -71.3
## 2                11            5434400          682875.8         NA        NA
## 3                11            5435662          678718.6         NA        NA
## 4                10            6210173          652456.4     -201.2     221.3
## 5                10            6085099          664789.3      274.9    -285.3
## 6                 9            5908329          302312.1     -463.9    -107.9
##   Surf.Owner Ground.Elevtn Directional.Flag Surf.Ref.Corner Surf.Ref.Unit
## 1      CROWN           1.5                N              NE            80
## 2      CROWN            NA                N              NW            50
## 3      CROWN            NA                N              SW            55
## 4      CROWN         758.6                N              NW            NA
## 5      CROWN         971.6                N              SE            65
## 6      CROWN           8.3                N              NE            48
##   Surf.Ref.Block Surf.Ref.Map Surf.Ref.Lsd Surf.Ref.Sect Surf.Ref.Twp
## 1              H     092-G-03           NA            NA           NA
## 2              D     082-G-01           NA            NA           NA
## 3              A     082-G-02           NA            NA           NA
## 4                                       13            11           81
## 5              E     093-I-16           NA            NA           NA
## 6              D     103-G-05           NA            NA           NA
##   Surf.Ref.Range Surf.DLS.Exception Surf.Lsd Surf.Sect Surf.Twp Surf.Range
## 1             NA                          NA        NA       NA         NA
## 2             NA                          NA        NA       NA         NA
## 3             NA                          NA        NA       NA         NA
## 4             17                          13        11       81         17
## 5             NA                          NA        NA       NA         NA
## 6             NA                          NA        NA       NA         NA
##   Surf.Qtr.Unit Surf.NTS.Exception Surf.Unit Surf.Block Surf.Map Oper.Id
## 1             D                           80          H 092-G-03       0
## 2             B                           50          D 082-G-01       0
## 3             D                           55          A 082-G-02       0
## 4                                         NA                        1018
## 5             A                           65          E 093-I-16    1016
## 6             C                           48          D 103-G-05   11075
##   Oper.Abbrev Oper.Abbrev2 Optnl.Unit       Well.Area.Name Well.Name.Date
## 1       ROYAL                                   CITY NO. 1       19480610
## 2    CANADIAN                               KOOTENAY NO. 1       19490903
## 3 BORDER OILS                                        NO. 1       19480704
## 4      CANLIN                                       PINGEL       20171206
## 5  CVE ENERGY        ET AL             LONE MOUNTAIN NO. 1       20180607
## 6  STRATHCONA                         QUEEN CHARLOTTE NO.1       20200901
##   Special.Well.Class.Code Test.Hole
## 1                      NO         N
## 2                      NO         N
## 3                                 N
## 4                      NO         N
## 5                      NO         N
## 6                      NO         N
names(wells_coords)
##  [1] "Well.Surf.Loc"           "Well.Name"              
##  [3] "WA.Num"                  "Surf.Nad27.Lat"         
##  [5] "Surf.Nad27.Long"         "Surf.Nad83.Lat"         
##  [7] "Surf.Nad83.Long"         "Surf.UTM.Zone.Num"      
##  [9] "Surf.UTM83.Northng"      "Surf.UTM83.Eastng"      
## [11] "Surf.North"              "Surf.East"              
## [13] "Surf.Owner"              "Ground.Elevtn"          
## [15] "Directional.Flag"        "Surf.Ref.Corner"        
## [17] "Surf.Ref.Unit"           "Surf.Ref.Block"         
## [19] "Surf.Ref.Map"            "Surf.Ref.Lsd"           
## [21] "Surf.Ref.Sect"           "Surf.Ref.Twp"           
## [23] "Surf.Ref.Range"          "Surf.DLS.Exception"     
## [25] "Surf.Lsd"                "Surf.Sect"              
## [27] "Surf.Twp"                "Surf.Range"             
## [29] "Surf.Qtr.Unit"           "Surf.NTS.Exception"     
## [31] "Surf.Unit"               "Surf.Block"             
## [33] "Surf.Map"                "Oper.Id"                
## [35] "Oper.Abbrev"             "Oper.Abbrev2"           
## [37] "Optnl.Unit"              "Well.Area.Name"         
## [39] "Well.Name.Date"          "Special.Well.Class.Code"
## [41] "Test.Hole"
# subset dataframe
wells_coords <- wells_coords %>% select(Surf.UTM83.Northng, Surf.UTM83.Eastng, Surf.UTM.Zone.Num, Well.Area.Name, Oper.Abbrev, Surf.Owner)

# select only wells from the UNT10 and UTM11 zones
wells_coords <- wells_coords %>% filter(complete.cases(.), Surf.UTM.Zone.Num %in% c(10,11))
unique(wells_coords$Surf.UTM.Zone.Num)
## [1] 10 11
# create 2 geodataframes with crs values according to UTM zone
wells_pts_1 <- wells_coords %>% 
  filter(Surf.UTM.Zone.Num == 10) %>% 
  st_as_sf(coords = c("Surf.UTM83.Eastng", "Surf.UTM83.Northng"), crs = 26910) %>% 
  st_transform(4326) # want to use latlon coords

wells_pts_2 <- wells_coords %>% 
  filter(Surf.UTM.Zone.Num == 11) %>%  
  st_as_sf(coords = c("Surf.UTM83.Eastng", "Surf.UTM83.Northng"), crs = 26910) %>%
  st_transform(4326) # want to use latlon coords

# bind dataframe rows vertically and check the coordinate system
wells_pts <- rbind(wells_pts_1, wells_pts_2)
st_crs(wells_pts)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
# now let's load geoophysical lines
geoph_lines <- st_read("BCOGC_data/shapefiles/PASR_GEOPHYSICAL_LN_subset.shp")
## Reading layer `PASR_GEOPHYSICAL_LN_subset' from data source `C:\Users\Paulina-laptop\Desktop\R_programming\RLadies_workshop\BCOGC_data\shapefiles\PASR_GEOPHYSICAL_LN_subset.shp' using driver `ESRI Shapefile'
## Simple feature collection with 49710 features and 19 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: 1266576 ymin: 1173555 xmax: 1374678 ymax: 1265680
## Projected CRS: NAD83 / BC Albers
# we will focus only on the lines within Dawson Area
geoph_lines_dawson <- filter(geoph_lines, PROG_NAME %in% c("DAWSON 3D","DAWSON 3D EXTENSION"))
# let's set the same crs as the wells
geoph_lines_dawson <- st_transform(geoph_lines_dawson, st_crs(wells_pts))

# merge the lines and create the polygon to filter the wells
dawson_area <- st_union(geoph_lines_dawson) %>% 
  st_convex_hull()
## although coordinates are longitude/latitude, st_union assumes that they are planar
# filter the wells
wells_dawson <- wells_pts %>%
  filter(st_within(x = ., y = dawson_area, sparse = FALSE))
## although coordinates are longitude/latitude, st_within assumes that they are planar
# plot together
ggplot() + 
  geom_sf(data = wells_dawson, size=1, alpha=0.2, colour='blue') + 
  geom_sf(data = geoph_lines_dawson, size=0.1)

names(wells_dawson)
## [1] "Surf.UTM.Zone.Num" "Well.Area.Name"    "Oper.Abbrev"      
## [4] "Surf.Owner"        "geometry"
# let's add O&G pools, transform crs and find the polls intersecting Dawson area
pools <- st_read("BCOGC_data/shapefiles/POOL_CONTOURS_LN.shp")
## Reading layer `POOL_CONTOURS_LN' from data source `C:\Users\Paulina-laptop\Desktop\R_programming\RLadies_workshop\BCOGC_data\shapefiles\POOL_CONTOURS_LN.shp' using driver `ESRI Shapefile'
## Simple feature collection with 7963 features and 12 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: 1091006 ymin: 1070402 xmax: 1386930 ymax: 1679933
## Projected CRS: NAD83 / BC Albers
# get the production pools intersecting the Dawson area only
pools <- st_transform(pools, st_crs(wells_dawson))
pools_dawson <- pools %>%
  filter(st_intersects(x = ., y = dawson_area, sparse = FALSE))
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
# plot it
ggplot() + 
  geom_sf(data = wells_dawson, alpha=0.2) + 
  geom_sf(data = geoph_lines_dawson, alpha=0.2) + 
  geom_sf(data = pools_dawson, aes(colour=FLUID_TYPE)) +
  scale_color_discrete(name = "Producing pools") +
  xlab("Longitude") + 
  ylab("Latitude")

# Let's add one more information:O&G fields. Transform to the crs of the wells
fields <- st_read("BCOGC_data/shapefiles/FIELDS_PY.shp")
## Reading layer `FIELDS_PY' from data source `C:\Users\Paulina-laptop\Desktop\R_programming\RLadies_workshop\BCOGC_data\shapefiles\FIELDS_PY.shp' using driver `ESRI Shapefile'
## Simple feature collection with 570 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1079392 ymin: 1066850 xmax: 1387498 ymax: 1680540
## Projected CRS: NAD83 / BC Albers
ggplot() + 
  geom_sf(data = fields)

fields <- st_transform(fields, st_crs(wells_dawson))

# let's focus on the fields in the smaller area (crop the layer)
fields <- st_crop(fields, xmin = -121, xmax = -120, ymin = 55.7, ymax = 56)
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
unique(fields$FLDRNM)
##  [1] "MICA"           "PARKLAND"       "DOE"            "DAWSON CREEK"  
##  [5] "GROUNDBIRCH"    "SATURN"         "SUNSET PRAIRIE" "BRIAR RIDGE"   
##  [9] "BRASSEY"        "TOWER LAKE"     "SUNDOWN"        "SUNRISE"
# let's visualize only Dawson field 
field_dawson <- fields %>% filter(FLDRNM == "DAWSON CREEK")

# let's plot it together 
ggplot() + 
  geom_sf(data = wells_dawson, alpha=0.2) +
  geom_sf(data = geoph_lines_dawson, alpha=0.2) +
  geom_sf(data = pools, aes(colour=FLUID_TYPE)) +
  geom_sf(data = field_dawson, fill=NA, linetype="dashed", colour="blue", size=0.2) +
  scale_color_discrete(name = "Producing pools") +
  coord_sf(crs = 4326, xlim = c(-120.5,-120), ylim = c(55.7, 56)) + 
  xlab("Longitude") + 
  ylab("Latitude")